# 日期：2021年8月11日
# 作者：鲍发户
# 名称：小姐姐爬取
import requests

import time

from lxml import etree

from urllib import request

# 定义爬取范围
BASE_DOMAIN = 'http://www.girl13.com/'
HEADERS = {
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36',
}
START_URLS = 'http://www.girl13.com/page/{}/'


# 获取了所有的图片url
def get_all_urls():
    for i in range(1, 607):
        url = START_URLS.format(i)
        crawl_url(url)
        time.sleep(3)


# 解析网页获取小姐姐图片地址
def crawl_url(url):
    resp = requests.get(url, headers=HEADERS)
    text = resp.content.decode('utf-8')
    tree = etree.HTML(text)
    img_urls = tree.xpath("//div[@id='loop-square']//p//@src")
    # print(img_urls)
    for img_url in img_urls:
        name = img_url.split("com")[-1]
        request.urlretrieve(img_url, r"E:\\imgs\\" + name)


if __name__ == '__main__':
    get_all_urls()
